# -------------------------------------------------------------------
# Purpose: Creates Figure 1
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate both panels of the figure
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutput))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Left panel: Patents per 1,000 people
temp <- copy(countyLevel19001940) %>% drop_na(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, sum_patents_pc_1900_f_w)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year, data = temp))]
temp[, sum_patents_pc_1900_f_w := resid(feols(sum_patents_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year, data = temp))]
est1 <- binsreg(sum_patents_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
  labs(
    x = "Predicted surname diversity",
    y = NULL,
    subtitle = "Patents per 1,000 people",
  ) +
  geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
  theme_minimal()
plotname <- file.path(poutput, "figure01left.eps")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure 1 left saved to:", plotname, "\n")


# Right panel: Breakthrough patents per 1,000 people
temp <- copy(countyLevel19001940) %>% drop_na(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws, sum_break_p80_rrfsim05_pc_1900_f_w)
temp[, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws := resid(feols(iv_lo_entropy_namelast_mp_adjp_fe_immig_ws ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year, data = temp))]
temp[, sum_break_p80_rrfsim05_pc_1900_f_w := resid(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws | gisjoin_1900 + statefip^year, data = temp))]
est1 <- binsreg(sum_break_p80_rrfsim05_pc_1900_f_w, iv_lo_entropy_namelast_mp_adjp_fe_immig_ws, data = temp)
result <- est1$data.plot$`Group Full Sample`
p <- ggplot() +
  labs(
    x = "Predicted surname diversity",
    y = NULL,
    subtitle = "Breakthrough patents per 1,000 people",
  ) +
  geom_point(data = result$data.dots, aes(x = x, y = fit), size = 2, alpha = 1) +
  theme_minimal()
plotname <- file.path(poutput, "figure01right.eps")
ggsave(plotname, p, width = 4.6, height = 4.6, units = "in")

cat("Figure 1 right saved to:", plotname, "\n")
